Vamos a cargar el archivo de datos

data_url = 'ds_salaries.csv '
salaries <- read.csv(data_url)
if (!require("dplyr")) install.packages("dplyr"); library(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
salaries <- salaries %>%
  mutate(experience_level = recode(experience_level,
                                   'EN' = 'Entry-Level',
                                   'MI' = 'Mid-Level',
                                   'SE' = 'Senior',
                                   'EX' = 'Executive'))

1 ¿Cómo afecta el país del empleado al sueldo?

library(countrycode)
## Warning: package 'countrycode' was built under R version 4.2.3
salaries <- salaries %>%
  mutate(employee_residence = countrycode(employee_residence, origin = 'iso2c', destination = 'iso3c', warn = FALSE))
library(dplyr)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
salario_medio_por_pais <- salaries %>%
  group_by(employee_residence) %>%
  summarize(Salario_Medio = mean(salary_in_usd, na.rm = TRUE))
salario_medio_por_pais <- salario_medio_por_pais %>%
  filter(employee_residence != "ISR") # eliminado por valor atípico
fig <- plot_ly(type = 'choropleth', locationmode = 'ISO-3') %>%
  add_trace(z = ~salario_medio_por_pais$Salario_Medio, locations = ~salario_medio_por_pais$employee_residence) %>%
  layout(title = "Salary per Country",
         geo = list(scope = 'world', showframe = FALSE, projection = list(type = 'mercator')),
         colorbar = list(tickvals = c(0, 170000), ticktext = c("0", "170k"), tickmode = "array"),
         legend = list(title = "Salary"))

fig
## Warning: 'layout' objects don't have these attributes: 'colorbar'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'

2 ¿Cómo afecta el tamaño de la compañía al sueldo? y ¿Qué diferencias de salarios podemos ver entre los distintos roles y niveles formativos?

employees_by_country <- table(salaries$company_location)
employees_by_country <- sort(employees_by_country, decreasing = TRUE)
head(employees_by_country, 5)
## 
##   US   GB   CA   ES   IN 
## 3040  172   87   77   58

Debido a la cantidad de empelos de diferencia entre países no tendría sentido comparar datos entre países debido a que no quedaría compensado por falta de muestras por lo que vamos a trabajar con los datos de US

library(dplyr)
library(plotly)

nuevo_dataset_us <- subset(salaries, company_location == "US")

media_salarios <- nuevo_dataset_us %>%
  group_by(experience_level, company_size) %>%
  summarize(Media_Salario = mean(salary_in_usd))
## `summarise()` has grouped output by 'experience_level'. You can override using
## the `.groups` argument.
fig <- plot_ly(media_salarios, x = ~experience_level, y = ~Media_Salario, color = ~company_size, type = 'bar') %>%
  layout(title = "Salary per Experience Level and Company Size",
         xaxis = list(title = "Experience"),
         yaxis = list(title = "Salary (USD)"))

fig
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
library(plotly)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
media_salarios_por_title <- nuevo_dataset_us %>%
  group_by(work_year, job_title) %>%
  summarize(Media_Salario = mean(salary_in_usd))
## `summarise()` has grouped output by 'work_year'. You can override using the
## `.groups` argument.
media_salarios_por_title <- media_salarios_por_title %>%
  pivot_wider(names_from = job_title, values_from = Media_Salario)

media_salarios_por_title <- melt(media_salarios_por_title, id.vars = "work_year", variable.name = "job_title", value.name = "Media_Salario")

fig <- plot_ly(data = media_salarios_por_title, x = ~work_year, y = ~Media_Salario, color = ~job_title, type = 'scatter', mode = 'lines+markers') %>%
  layout(title = "Salary Evolution per Job Title",
         xaxis = list(title = "Year"),
         yaxis = list(title = "Salary (USD)"),
         width = 800, 
         height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors